from fastai.vision.all import *
Downloading the datasets, we will be looking at the PETs Dataset
path = untar_data(URLs.PETS)
files = get_image_files(path/"images")
We need to tell the lables our model:
Inside PETS dataset, Cats have the first letter as upper. So we write label_func()
def label_func(f): return f[0].isupper()
xDataLoader: x can be Application supported by fastaidls = ImageDataLoaders.from_name_func(path, files, label_func, item_tfms=Resize(224))
Let's take a look at the first few images:
dls.show_batch()
Dogs >>>Cats. I said it 😁
Let's create our Machine Learning Model! More accurately, a CNN.
We'll use a cnn_learner for our task
learn = cnn_learner(dls, resnet34, metrics=error_rate)
learn.fine_tune(1)
learn.show_results()
pets = DataBlock(blocks=(ImageBlock, CategoryBlock),
get_items=get_image_files,
splitter=RandomSplitter(),
get_y=using_attr(RegexLabeller(r'(.+)_\d+.jpg$'), 'name'),
item_tfms=Resize(460),
batch_tfms=aug_transforms(size=224))
Now we're ready to load all of these into a dataloader and show the first batch
dls = pets.dataloaders(untar_data(URLs.PETS)/"images")
dls.show_batch(max_n=9)
The orignal task of the dataset is object detection, let's try to use it for image classification example
First, let's look at the high-level API:
path = untar_data(URLs.PASCAL_2007)
path.ls()
df = pd.read_csv(path/'train.csv')
df.head()
dls = ImageDataLoaders.from_df(df, path, folder='train', valid_col='is_valid', label_delim=' ',
item_tfms=Resize(460), batch_tfms=aug_transforms(size=224))
dls.show_batch()
learn = cnn_learner(dls, resnet50, metrics=partial(accuracy_multi, thresh=0.5))
learn.fine_tune(4, 3e-2)
learn.show_results()
learn.predict(path/'train/000005.jpg')
interp = Interpretation.from_learner(learn)
interp.plot_top_losses(9)
Let's Recap:
The DataBlock API needs:
pascal = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
splitter=ColSplitter('is_valid'),
get_x=ColReader('fname', pref=str(path/'train') + os.path.sep),
get_y=ColReader('labels', label_delim=' '),
item_tfms = Resize(460),
batch_tfms=aug_transforms(size=224))
dls = pascal.dataloaders(df)
dls.show_batch(max_n=9)
The goal of showcasing this is to demonstrate the similarity between different applications
from fastai.text.all import *
Loading the IMDB Dataset, we'll try to classify Movie-reviews
path = untar_data(URLs.IMDB)
path.ls()
xDataLoaders, x=Text
dls = TextDataLoaders.from_folder(untar_data(URLs.IMDB), valid='test')
We'll need to create a Learner for Text Classifcation, aptly named text_classifier_learner
learn = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
learn.fine_tune(4, 1e-2)
learn.fine_tune(4, 1e-2)
Surprisingly, this model isn't very bad!
learn.predict("I really liked that movie!")
imdb = DataBlock(blocks=(TextBlock.from_folder(path), CategoryBlock),
get_items=get_text_files,
get_y=parent_label,
splitter=GrandparentSplitter(valid_name='test'))
dls = imdb.dataloaders(path)
Let's look at a Tabular Dataset to further extrapolate the similarities
from fastai.tabular.all import *
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')
dls = TabularDataLoaders.from_csv(path/'adult.csv', path=path, y_names="salary",
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race'],
cont_names = ['age', 'fnlwgt', 'education-num'],
procs = [Categorify, FillMissing, Normalize])
splits = RandomSplitter(valid_pct=0.2)(range_of(df))
to = TabularPandas(df, procs=[Categorify, FillMissing,Normalize],
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race'],
cont_names = ['age', 'fnlwgt', 'education-num'],
y_names='salary',
splits=splits)
dls = to.dataloaders(bs=64)
learn = tabular_learner(dls, metrics=accuracy)
learn.fit_one_cycle(1)
GPU Transforms:
In this walkthrough, we will look at two images:
Let's load the Doggy image into memory and take a look at it:
path = untar_data(URLs.PETS)
DOGGY = path/'images/beagle_1.jpg'
DOGGY
_MED = Path("./")
MED = _MED/"med.jpg"
_SAT = Path("./")
SAT = _SAT/"sat.jpg"
_BOX = Path("./")
BOX = _BOX/"box.jpg"
img = PILImage(PILImage.create(DOGGY).resize((600,400)))
med = PILImage(PILImage.create(MED).resize((600,400)))
sat = PILImage(PILImage.create(SAT).resize((600,400)))
box = PILImage(PILImage.create(BOX).resize((600,400)))
_,axs = subplots(1,2)
show_image(img, ctx=axs[0], title='original')
show_image(img.flip_lr(), ctx=axs[1], title='flipped');
_,axs = subplots(1,2)
show_image(sat, ctx=axs[0], title='original')
show_image(sat.flip_lr(), ctx=axs[1], title='flipped');
_,axs = subplots(2, 4)
for ax in axs.flatten():
show_image(DihedralItem(p=1.)(img, split_idx=0), ctx=ax)
_,axs = subplots(2, 4)
for ax in axs.flatten():
show_image(DihedralItem(p=1.)(sat, split_idx=0), ctx=ax)
_,axs = plt.subplots(1,3,figsize=(12,4))
for ax,sz in zip(axs.flatten(), [300, 500, 700]):
show_image(img.crop_pad(sz), ctx=ax, title=f'Size {sz}');
_,axs = plt.subplots(1,3,figsize=(12,4))
for ax,sz in zip(axs.flatten(), [300, 500, 700]):
show_image(med.crop_pad(sz), ctx=ax, title=f'Size {sz}');
_,axs = plt.subplots(1,3,figsize=(12,4))
for ax,mode in zip(axs.flatten(), [PadMode.Zeros, PadMode.Border, PadMode.Reflection]):
show_image(img.crop_pad((600,700), pad_mode=mode), ctx=ax, title=mode);
_,axs = plt.subplots(1,3,figsize=(12,4))
for ax,mode in zip(axs.flatten(), [PadMode.Zeros, PadMode.Border, PadMode.Reflection]):
show_image(med.crop_pad((600,700), pad_mode=mode), ctx=ax, title=mode);
_,axs = plt.subplots(1,3,figsize=(12,4))
f = RandomCrop(200)
for ax in axs: show_image(f(img), ctx=ax);
_,axs = plt.subplots(1,3,figsize=(12,4))
f = RandomCrop(200)
for ax in axs: show_image(f(med), ctx=ax);
_,axs = plt.subplots(1,3,figsize=(12,4))
for ax,method in zip(axs.flatten(), [ResizeMethod.Squish, ResizeMethod.Pad, ResizeMethod.Crop]):
rsz = Resize(256, method=method)
show_image(rsz(img, split_idx=0), ctx=ax, title=method);
Let's take an Image Search Engine Example:
_,axs = plt.subplots(1,3,figsize=(12,4))
for ax,method in zip(axs.flatten(), [ResizeMethod.Squish, ResizeMethod.Pad, ResizeMethod.Crop]):
rsz = Resize(256, method=method)
show_image(rsz(box, split_idx=0), ctx=ax, title=method);
crop = RandomResizedCrop(256)
_,axs = plt.subplots(3,3,figsize=(9,9))
for ax in axs.flatten():
cropped = crop(img)
show_image(cropped, ctx=ax);
Let's look at the OCR Example:
crop = RandomResizedCrop(256)
_,axs = plt.subplots(3,3,figsize=(9,9))
for ax in axs.flatten():
cropped = crop(med)
show_image(cropped, ctx=ax);
RatioResize(1024)(img)
timg = TensorImage(array(img)).permute(2,0,1).float()/255.
def _batch_ex(bs): return TensorImage(timg[None].expand(bs, *timg.shape).clone())
timg_sat = TensorImage(array(sat)).permute(2,0,1).float()/255.
def sat_batch_ex(bs): return TensorImage(timg_sat[None].expand(bs, *timg_sat.shape).clone())
timg_med = TensorImage(array(med)).permute(2,0,1).float()/255.
def med_batch_ex(bs): return TensorImage(timg_med[None].expand(bs, *timg_med.shape).clone())
timg_box = TensorImage(array(box)).permute(2,0,1).float()/255.
def box_batch_ex(bs): return TensorImage(timg_box[None].expand(bs, *timg_box.shape).clone())
t = _batch_ex(8)
rrc = RandomResizedCropGPU(224, p=1.)
y = rrc(t)
_,axs = plt.subplots(2,4, figsize=(12,6))
for ax in axs.flatten():
show_image(y[2], ctx=ax)
x = flip_mat(torch.randn(100,4,3))
test_eq(set(x[:,0,0].numpy()), {-1,1}) #might fail with probability 2*2**(-100) (picked only 1s or -1s)
dih = DeterministicFlip({'p':.3})
t = _batch_ex(8)
dih = DeterministicFlip()
_,axs = plt.subplots(2,4, figsize=(12,6))
for i,ax in enumerate(axs.flatten()):
y = dih(t)
show_image(y[0], ctx=ax, title=f'Call {i}')
t_ = box_batch_ex(8)
dih = DeterministicFlip()
_,axs = plt.subplots(2,4, figsize=(12,6))
for i,ax in enumerate(axs.flatten()):
y = dih(t_)
show_image(y[0], ctx=ax, title=f'Call {i}')
t = _batch_ex(8)
dih = Dihedral(p=1., draw=list(range(8)))
y = dih(t)
y = t.dihedral_batch(p=1., draw=list(range(8)))
_,axs = plt.subplots(2,4, figsize=(12,5))
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax, title=f'Flip {i}')
t = sat_batch_ex(8)
dih = Dihedral(p=1., draw=list(range(8)))
y = dih(t)
y = t.dihedral_batch(p=1., draw=list(range(8)))
_,axs = plt.subplots(2,4, figsize=(12,5))
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax, title=f'Flip {i}')
t = _batch_ex(8)
dih = DeterministicDihedral()
_,axs = plt.subplots(2,4, figsize=(12,6))
for i,ax in enumerate(axs.flatten()):
y = dih(t)
show_image(y[0], ctx=ax, title=f'Call {i}')
t = sat_batch_ex(8)
dih = DeterministicDihedral()
_,axs = plt.subplots(2,4, figsize=(12,6))
for i,ax in enumerate(axs.flatten()):
y = dih(t)
show_image(y[0], ctx=ax, title=f'Call {i}')
thetas = [-30,-15,0,15,30]
y = _batch_ex(5).rotate(draw=thetas, p=1.)
_,axs = plt.subplots(1,5, figsize=(15,3))
for i,ax in enumerate(axs.flatten()):
show_image(y[i], ctx=ax, title=f'{thetas[i]} degrees')
Let's rewind to the sattelite image:
thetas = [-30,-15,0,15,30]
y = sat_batch_ex(5).rotate(draw=thetas, p=1.)
_,axs = plt.subplots(1,5, figsize=(15,3))
for i,ax in enumerate(axs.flatten()):
show_image(y[i], ctx=ax, title=f'{thetas[i]} degrees')
scales = [0.8, 1., 1.1, 1.25, 1.5]
n = len(scales)
y = _batch_ex(n).zoom(draw=scales, p=1., draw_x=0.5, draw_y=0.5)
fig,axs = plt.subplots(1, n, figsize=(12,3))
fig.suptitle('Center zoom with different scales')
for i,ax in enumerate(axs.flatten()):
show_image(y[i], ctx=ax, title=f'scale {scales[i]}')
This comes in hand when we're trying to zoom into an image
scales = [0.8, 1., 1.1, 1.25, 1.5]
n = len(scales)
y = med_batch_ex(n).zoom(draw=scales, p=1., draw_x=0.5, draw_y=0.5)
fig,axs = plt.subplots(1, n, figsize=(12,3))
fig.suptitle('Center zoom with different scales')
for i,ax in enumerate(axs.flatten()):
show_image(y[i], ctx=ax, title=f'scale {scales[i]}')
y = _batch_ex(4).zoom(p=1., draw=1.5)
fig,axs = plt.subplots(1,4, figsize=(12,3))
fig.suptitle('Constant scale and different random centers')
for i,ax in enumerate(axs.flatten()):
show_image(y[i], ctx=ax)
Going back to the OCR Example:
y = med_batch_ex(4).zoom(p=1., draw=1.5)
fig,axs = plt.subplots(1,4, figsize=(12,3))
fig.suptitle('Constant scale and different random centers')
for i,ax in enumerate(axs.flatten()):
show_image(y[i], ctx=ax)
scales = [-0.4, -0.2, 0., 0.2, 0.4]
warp = Warp(p=1., draw_y=scales, draw_x=0.)
y = warp(_batch_ex(5), split_idx=0)
fig,axs = plt.subplots(1,5, figsize=(15,3))
fig.suptitle('Vertical warping')
for i,ax in enumerate(axs.flatten()):
show_image(y[i], ctx=ax, title=f'magnitude {scales[i]}')
scales = [-0.4, -0.2, 0., 0.2, 0.4]
warp = Warp(p=1., draw_y=scales, draw_x=0.)
y = warp(box_batch_ex(5), split_idx=0)
fig,axs = plt.subplots(1,5, figsize=(15,3))
fig.suptitle('Vertical warping')
for i,ax in enumerate(axs.flatten()):
show_image(y[i], ctx=ax, title=f'magnitude {scales[i]}')
scales = [-0.4, -0.2, 0., 0.2, 0.4]
warp = Warp(p=1., draw_x=scales, draw_y=0.)
y = warp(_batch_ex(5), split_idx=0)
fig,axs = plt.subplots(1,5, figsize=(15,3))
fig.suptitle('Horizontal warping')
for i,ax in enumerate(axs.flatten()):
show_image(y[i], ctx=ax, title=f'magnitude {scales[i]}')
scales = [-0.4, -0.2, 0., 0.2, 0.4]
warp = Warp(p=1., draw_x=scales, draw_y=0.)
y = warp(box_batch_ex(5), split_idx=0)
fig,axs = plt.subplots(1,5, figsize=(15,3))
fig.suptitle('Horizontal warping')
for i,ax in enumerate(axs.flatten()):
show_image(y[i], ctx=ax, title=f'magnitude {scales[i]}')
scales = [0.1, 0.3, 0.5, 0.7, 0.9]
y = _batch_ex(5).brightness(draw=scales, p=1.)
fig,axs = plt.subplots(1,5, figsize=(15,3))
for i,ax in enumerate(axs.flatten()):
show_image(y[i], ctx=ax, title=f'scale {scales[i]}')
scales = [0.1, 0.3, 0.5, 0.7, 0.9]
y = med_batch_ex(5).brightness(draw=scales, p=1.)
fig,axs = plt.subplots(1,5, figsize=(15,3))
for i,ax in enumerate(axs.flatten()):
show_image(y[i], ctx=ax, title=f'scale {scales[i]}')
scales = [0.65, 0.8, 1., 1.25, 1.55]
y = _batch_ex(5).contrast(p=1., draw=scales)
fig,axs = plt.subplots(1,5, figsize=(15,3))
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax, title=f'scale {scales[i]}')
scales = [0.65, 0.8, 1., 1.25, 1.55]
y = med_batch_ex(5).contrast(p=1., draw=scales)
fig,axs = plt.subplots(1,5, figsize=(15,3))
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax, title=f'scale {scales[i]}')
scales = [0., 0.5, 1., 1.5, 2.0]
y = _batch_ex(5).saturation(p=1., draw=scales)
fig,axs = plt.subplots(1,5, figsize=(15,3))
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax, title=f'scale {scales[i]}')
scales = [0., 0.5, 1., 1.5, 2.0]
y = med_batch_ex(5).saturation(p=1., draw=scales)
fig,axs = plt.subplots(1,5, figsize=(15,3))
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax, title=f'scale {scales[i]}')
fig,axs=plt.subplots(figsize=(20, 4),ncols=5)
axs[0].set_ylabel('Hue')
for ax in axs:
ax.set_xlabel('Saturation')
ax.set_yticklabels([])
ax.set_xticklabels([])
hsv=torch.stack([torch.arange(0,2.1,0.01)[:,None].repeat(1,210),
torch.arange(0,1.05,0.005)[None].repeat(210,1),
torch.ones([210,210])])[None]
for ax,i in zip(axs,range(0,5)):
if i>0: hsv[:,2].mul_(0.80)
ax.set_title('V='+'%.1f' %0.8**i)
ax.imshow(hsv2rgb(hsv)[0].permute(1,2,0))
scales = [0.5, 0.75, 1., 1.5, 1.75]
y = _batch_ex(len(scales)).hue(p=1., draw=scales)
fig,axs = plt.subplots(1,len(scales), figsize=(15,3))
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax, title=f'scale {scales[i]}')
scales = [0.5, 0.75, 1., 1.5, 1.75]
y = sat_batch_ex(len(scales)).hue(p=1., draw=scales)
fig,axs = plt.subplots(1,len(scales), figsize=(15,3))
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax, title=f'scale {scales[i]}')
nrm = Normalize.from_stats(*imagenet_stats, cuda=False)
f = partial(cutout_gaussian, areas=[(100,200,100,200),(200,300,200,300)])
show_image(norm_apply_denorm(timg, f, nrm)[0]);
tfm = RandomErasing(p=1., max_count=6)
_,axs = subplots(2,3, figsize=(12,6))
f = partial(tfm, split_idx=0)
for i,ax in enumerate(axs.flatten()): show_image(norm_apply_denorm(timg, f, nrm)[0], ctx=ax)
y = _batch_ex(6)
_,axs = plt.subplots(2,3, figsize=(12,6))
y = norm_apply_denorm(y, f, nrm)
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax)
tfms = aug_transforms(pad_mode='zeros', mult=2, min_scale=0.5)
y = _batch_ex(9)
for t in tfms: y = t(y, split_idx=0)
_,axs = plt.subplots(1,3, figsize=(12,3))
for i,ax in enumerate(axs.flatten()): show_image(y[i], ctx=ax)
A Learner object allows you to call a tta() method like so:
dls = ImageDataLoaders.from_name_func(path, files, label_func, item_tfms=Resize(224))
learn = cnn_learner(dls, resnet34, metrics=error_rate)
learn.fine_tune(1)
learn.tta()